In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
# import seaborn as sns 
import datetime
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Scatter, Figure, Layout
import plotly
import plotly.graph_objs as go
import plotly.express as px
from IPython.display import Markdown as md
init_notebook_mode(connected=False)
import io
import requests
import re

COVID-19 in Italy. Visuals


(alternatively, see results and code together here)

 


Data source: this GitHubi page

Authors and sources mentioned: Editore/Autore del dataset: Dipartimento della Protezione Civile. Categoria ISO 19115: Salute. Dati forniti dal Ministero della Salute.

Regional data files (Dati per Regione):
  • Struttura file giornaliero: dpc-covid19-ita-regioni-yyyymmdd.csv (dpc-covid19-ita-regioni-20200224.csv)
  • File complessivo: dpc-covid19-ita-regioni.csv
  • File ultimi dati (latest): dpc-covid19-ita-regioni-latest.csv

 

In [2]:
URL='https://it.wikipedia.org/wiki/Regione_(Italia)'
res=requests.get(URL)
tables=pd.read_html(res.text)
dt = tables[13]
In [3]:
def dewhite(x):
    ''.join(re.findall('\d+', x))

dt2 = dt[['Regione','Popolazione (ab.)']].copy()
dt2.columns = ['Region','Pop']
    
dt2.Pop = dt2.Pop.apply(lambda x: ''.join(re.findall('\d+', x))).astype(int)
In [4]:
s = requests.get("https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv").content
dat = pd.read_csv(io.StringIO(s.decode('utf-8')))
cdate = dat.data.max()

md("Currently data as of date: {}".format(cdate))
Out[4]:

Currently data as of date: 2021-02-14T17:00:00


 

What's in the original dataframe?

In [5]:
md("All column names: {}".format(dat.columns.tolist()))
Out[5]:

All column names: ['data', 'stato', 'codice_regione', 'denominazione_regione', 'lat', 'long', 'ricoverati_con_sintomi', 'terapia_intensiva', 'totale_ospedalizzati', 'isolamento_domiciliare', 'totale_positivi', 'variazione_totale_positivi', 'nuovi_positivi', 'dimessi_guariti', 'deceduti', 'casi_da_sospetto_diagnostico', 'casi_da_screening', 'totale_casi', 'tamponi', 'casi_testati', 'note', 'ingressi_terapia_intensiva', 'note_test', 'note_casi', 'totale_positivi_test_molecolare', 'totale_positivi_test_antigenico_rapido', 'tamponi_test_molecolare', 'tamponi_test_antigenico_rapido', 'codice_nuts_1', 'codice_nuts_2']

In [6]:
df = dat.drop(['stato','codice_regione'], axis=1)
df.columns = ['Date','Region','Lat','Long','HospWithSymptoms','IC','HospTotal','AtHome','CurrentlyPositive','VariationOfPositives','NewPositives','Recovered', 'Deaths','Diagnostico','Screening','TotalCases','NoOfTests','casi_testati','note','ingr_ter_intens','note_test','note_casi', 'totale_positivi_test_molecolare', 'totale_positivi_test_antigenico_rapido', 'tamponi_test_molecolare', 'tamponi_test_antigenico_rapido', 'codice_nuts_1', 'codice_nuts_2']

df = pd.merge(df, dt2, left_on='Region', right_on='Region')

df['Date'] = pd.to_datetime(df['Date']).dt.date
df = df.set_index(df["Date"])
df.index = pd.to_datetime(df.index)

df['NewPositives'] = np.abs(df['NewPositives'])

dat.tail(5)
Out[6]:
data stato codice_regione denominazione_regione lat long ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare ... note ingressi_terapia_intensiva note_test note_casi totale_positivi_test_molecolare totale_positivi_test_antigenico_rapido tamponi_test_molecolare tamponi_test_antigenico_rapido codice_nuts_1 codice_nuts_2
7492 2021-02-14T17:00:00 ITA 19 Sicilia 38.115697 13.362357 1030 165 1195 33671 ... NaN 5.0 NaN NaN 145744.0 0.0 1630116.0 485565.0 ITG ITG1
7493 2021-02-14T17:00:00 ITA 9 Toscana 43.769231 11.255889 693 130 823 11342 ... NaN 6.0 NaN NaN 143070.0 305.0 2291264.0 166807.0 ITI ITI1
7494 2021-02-14T17:00:00 ITA 10 Umbria 43.106758 12.388247 451 84 535 7631 ... Si fa presente che 25 dei ricoveri NON UTI, so... 6.0 NaN NaN 40959.0 0.0 647762.0 83904.0 ITI ITI2
7495 2021-02-14T17:00:00 ITA 2 Valle d'Aosta 45.737503 7.320149 13 2 15 122 ... NaN 0.0 NaN NaN 7909.0 0.0 71565.0 2220.0 ITC ITC2
7496 2021-02-14T17:00:00 ITA 5 Veneto 45.434905 12.338452 927 113 1040 23774 ... Nei valori riportati per le terapie intensive ... 8.0 NaN NaN 319142.0 2333.0 3891480.0 682364.0 ITH ITH3

5 rows × 30 columns


 

Variable names to English and their explanation

  • HospWithSymptoms : Currently hospitalized patients with symptoms
  • IC : Intensive care
  • HospTotal: Total number of currently hospitalized patients
  • AtHome : Currently at home confinement
  • CurrentlyPositive : Total amount of current positive cases (Hospitalised patients + Home confinement)
  • NewPositives : New amount of positive cases (Actual total amount of current positive cases - total amount of current positive cases of the previous day)
  • TotalCases : Total amount of positive cases
  • NoOfTests : Tests performed
In [7]:
df.tail()
Out[7]:
Date Region Lat Long HospWithSymptoms IC HospTotal AtHome CurrentlyPositive VariationOfPositives ... ingr_ter_intens note_test note_casi totale_positivi_test_molecolare totale_positivi_test_antigenico_rapido tamponi_test_molecolare tamponi_test_antigenico_rapido codice_nuts_1 codice_nuts_2 Pop
Date
2021-02-10 2021-02-10 Veneto 45.434905 12.338452 1062 136 1198 24773 25971 -359 ... 13.0 NaN NaN 316746.0 2061.0 3850177.0 602340.0 ITH ITH3 4879133
2021-02-11 2021-02-11 Veneto 45.434905 12.338452 1029 135 1164 24454 25618 -353 ... 8.0 NaN NaN 317321.0 2194.0 3861592.0 622202.0 ITH ITH3 4879133
2021-02-12 2021-02-12 Veneto 45.434905 12.338452 1002 123 1125 24284 25409 -209 ... 8.0 NaN NaN 318009.0 2262.0 3872924.0 650451.0 ITH ITH3 4879133
2021-02-13 2021-02-13 Veneto 45.434905 12.338452 942 111 1053 23876 24929 -480 ... 7.0 NaN NaN 318698.0 2262.0 3883017.0 673721.0 ITH ITH3 4879133
2021-02-14 2021-02-14 Veneto 45.434905 12.338452 927 113 1040 23774 24814 -115 ... 8.0 NaN NaN 319142.0 2333.0 3891480.0 682364.0 ITH ITH3 4879133

5 rows × 29 columns


 

daily numbers & moving averages (MA)

(double click and click on legend to select one or multiple regions in the graph)

In [8]:
df2 = df

fig = px.line(df2, x="Date", y="NewPositives", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Daily new positive cases")
fig.show()
In [9]:
df2['MovAv7'] = df2['NewPositives'].rolling(window=7).mean()

fig = px.line(df2[df2.index>'2020-3-1'], x="Date", y="MovAv7", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="7-day MA of new positive cases")
fig.show()
In [10]:
df2['NewPos_per_100K'] = df2['NewPositives']/df2['Pop']*1000_00

df2['NewPos_per_100K'] = df2['NewPos_per_100K'].rolling(window=7).mean()

fig = px.line(df2[df2.index>'2020-3-1'], x="Date", y="NewPos_per_100K", color="Region", 
              hover_name="Region", log_y=False)
fig.update_layout(title="7-day MA of new positive cases, per 100K")
fig.show()
In [11]:
df2['PosTests'] = df2['NewPositives']/df2['NoOfTests']*100

fig = px.scatter(df2, y="PosTests", x="Date", color="Region", 
              hover_name="Region", log_y=True)
fig.update_layout(title="Percentage of positive tests")
fig.show()
In [12]:
df2['IC_per_100K'] = df2['IC']/df2['Pop']*1000_00

fig = px.line(df2, x="Date", y="IC_per_100K", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Current number of intensive care patients, per 100K")
fig.show()
In [13]:
df2['Hosp_per_100K'] = df2['HospTotal']/df2['Pop']*1000_00

fig = px.line(df2, x="Date", y="Hosp_per_100K", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Current number of hospitalized, per 100K")
fig.show()
In [14]:
df3 = df2.copy()

df3['NewDeaths'] = df3['Deaths'] - df3.groupby(['Region'])['Deaths'].transform('shift')

fig = px.bar(df3, x=df3['Date'], y="NewDeaths", color="Region", hover_name="Date")
fig.update_layout(title="Daily number of deaths")
fig.show()
In [15]:
df2['Deaths_per_100K'] = (df2['Deaths']/df2['Pop'])*1000_00
fig = px.line(df2, x="Date", y="Deaths_per_100K", color="Region", 
              hover_name="Region", render_mode="svg", line_shape='spline')
fig.update_layout(title="Cumulative number of deaths, per 100K")
fig.show()
In [16]:
df2['Change_per_100K'] = df2['VariationOfPositives']/df2['Pop']*1000_00
df2['Change_per_100K'] = df2['Change_per_100K'].rolling(window=7).mean()

fig = px.line(df2[(df2.index>'2020-3-1') & (df2['Region']!="""Valle d'Aosta""")], x='Date', y="Change_per_100K", 
              color="Region", hover_name="Date")
fig.update_layout(title="7-day MA of change in current positive cases, per 100K (excl. Valle d'Aosta)")
fig.show()
In [17]:
df2['Current_per_100K'] = df2['CurrentlyPositive']/df2['Pop']*1000_00
df2['Current_per_100K'] = df2['Current_per_100K'].rolling(window=14).mean()

fig = px.line(df2[(df2.index>'2020-3-7')], x='Date', y="Current_per_100K", color="Region", hover_name="Date")
fig.update_layout(title="14-day MA of current positive cases, per 100K")
fig.show()

 

All regions together

In [18]:
df2 = df
df_sum = df2.drop(['Lat','Long'], axis=1).groupby(df.Date).sum().reset_index()

df_sum2 = pd.melt(df_sum, id_vars=['Date'], value_vars=['NewPositives','IC','HospTotal','CurrentlyPositive'])

fig = px.line(df_sum2, x="Date", y="value", color='variable', hover_name="value", render_mode="svg", log_y=True, 
              line_shape='spline')
fig.update_layout(title="Number of new and current positives, current IC patients and currently hospitalized")
fig.show()

Molecular test positive rate

In [27]:
df2['pos.test.rate.mol'] = df2['totale_positivi_test_molecolare']/df2['tamponi_test_molecolare']*100


fig = px.line(df2[(df2.index>'2021-1-10') & (df2['Region']!="""Valle d'Aosta""")], x='Date', y="pos.test.rate.mol", 
              color="Region", hover_name="Date")
fig.update_layout(title="7-day MA of change in current positive cases, per 100K (excl. Valle d'Aosta)")
fig.show()

Antigene test positive rate

In [28]:
df2['pos.test.rate.ant'] = df2['totale_positivi_test_antigenico_rapido']/df2['tamponi_test_antigenico_rapido']*100


fig = px.line(df2[(df2.index>'2021-1-10') & (df2['Region']!="""Valle d'Aosta""")], x='Date', y="pos.test.rate.ant", 
              color="Region", hover_name="Date")
fig.update_layout(title="7-day MA of change in current positive cases, per 100K (excl. Valle d'Aosta)")
fig.show()
In [ ]: